#source('http://bioconductor.org/biocLite.R')
#biocLite('phyloseq')
library(phyloseq)
library(ggplot2)
library(plyr)
library(dplyr)
library(Rmisc)
library(DESeq2)
library(doParallel)
library(vegan)
library(grid)
library(gridExtra)
# Load biom file. 
biom <- import_biom("OTU_table.biom", "~/Dropbox/clado-manuscript/Nephele/PipelineResults_NMEPINZ20QK1/nephele_outputs/tree.tre", parseFunction=parse_taxonomy_greengenes)
biom
# Load and merge sample metadata with read data (biom). 
sam.data <- read.csv(file="sample.data.csv", row.names=1, header=TRUE)
sam.data$Date <- as.factor(sam.data$Date)
sam.data$DateSite <- paste(sam.data$Date, sam.data$Site)
sample_data(biom) <- sam.data
sample_data(biom)
## Sample Data:        [52 samples by 6 sample variables]:
##        TreatmentGroup  Site Date                       Description
## C178N1          Early North  178 Sample of day 178 at site North 1
## C178P1          Early Point  178 Sample of day 178 at site Point 1
## C185P2          Early Point  185 Sample of day 185 at site Point 2
## C206N2           Late North  206 Sample of day 206 at site North 2
## C206P1           Late Point  206 Sample of day 206 at site Point 1
## C206P2           Late Point  206 Sample of day 206 at site Point 2
## C214P1           Late Point  214 Sample of day 214 at site Point 1
## C214P2           Late Point  214 Sample of day 214 at site Point 2
## C214P3           Late Point  214 Sample of day 214 at site Point 3
## C214S1           Late South  214 Sample of day 214 at site South 1
## C214S2           Late South  214 Sample of day 214 at site South 2
## C214S3           Late South  214 Sample of day 214 at site South 3
## C185P1          Early Point  185 Sample of day 185 at site Point 1
## C185P3          Early Point  185 Sample of day 185 at site Point 3
## C199P3           Late Point  199 Sample of day 199 at site Point 3
## C199S2           Late South  199 Sample of day 199 at site South 2
## C199S3           Late South  199 Sample of day 199 at site South 3
## C206N1           Late North  206 Sample of day 206 at site North 1
## C178P2          Early Point  178 Sample of day 178 at site Point 2
## C199N3           Late North  199 Sample of day 199 at site North 3
## C206S1           Late South  206 Sample of day 206 at site South 1
## C214N3           Late North  214 Sample of day 214 at site North 3
## C199N2           Late North  199 Sample of day 199 at site North 2
## C206N3           Late North  206 Sample of day 206 at site North 3
## C206S2           Late South  206 Sample of day 206 at site South 2
## C199N1           Late North  199 Sample of day 199 at site North 1
## C199P1           Late Point  199 Sample of day 199 at site Point 1
## C199S1           Late South  199 Sample of day 199 at site South 1
## C214N1           Late North  214 Sample of day 214 at site North 1
## C172P1          Early Point  172 Sample of day 172 at site Point 1
## C199P2           Late Point  199 Sample of day 199 at site Point 2
## C172N3          Early North  172 Sample of day 172 at site North 3
## C172S3          Early South  172 Sample of day 172 at site South 3
## C178S2          Early South  178 Sample of day 178 at site South 2
## C178P3          Early Point  178 Sample of day 178 at site Point 3
## C178S3          Early South  178 Sample of day 178 at site South 3
## C172N1          Early North  172 Sample of day 172 at site North 1
## C172S1          Early South  172 Sample of day 172 at site South 1
## C178N3          Early North  178 Sample of day 178 at site North 3
## C185N2          Early North  185 Sample of day 185 at site North 2
## C185N3          Early North  185 Sample of day 185 at site North 3
## C185S3          Early South  185 Sample of day 185 at site South 3
## C214N2           Late North  214 Sample of day 214 at site North 2
## C172P2          Early Point  172 Sample of day 172 at site Point 2
## C185S2          Early South  185 Sample of day 185 at site South 2
## C172P3          Early Point  172 Sample of day 172 at site Point 3
## C185N1          Early North  185 Sample of day 185 at site North 1
## C172N2          Early North  172 Sample of day 172 at site North 2
## C178S1          Early South  178 Sample of day 178 at site South 1
## C185S1          Early South  185 Sample of day 185 at site South 1
## C172S2          Early South  172 Sample of day 172 at site South 2
## C178N2          Early North  178 Sample of day 178 at site North 2
##        SampleID.1  DateSite
## C178N1     C178N1 178 North
## C178P1     C178P1 178 Point
## C185P2     C185P2 185 Point
## C206N2     C206N2 206 North
## C206P1     C206P1 206 Point
## C206P2     C206P2 206 Point
## C214P1     C214P1 214 Point
## C214P2     C214P2 214 Point
## C214P3     C214P3 214 Point
## C214S1     C214S1 214 South
## C214S2     C214S2 214 South
## C214S3     C214S3 214 South
## C185P1     C185P1 185 Point
## C185P3     C185P3 185 Point
## C199P3     C199P3 199 Point
## C199S2     C199S2 199 South
## C199S3     C199S3 199 South
## C206N1     C206N1 206 North
## C178P2     C178P2 178 Point
## C199N3     C199N3 199 North
## C206S1     C206S1 206 South
## C214N3     C214N3 214 North
## C199N2     C199N2 199 North
## C206N3     C206N3 206 North
## C206S2     C206S2 206 South
## C199N1     C199N1 199 North
## C199P1     C199P1 199 Point
## C199S1     C199S1 199 South
## C214N1     C214N1 214 North
## C172P1     C172P1 172 Point
## C199P2     C199P2 199 Point
## C172N3     C172N3 172 North
## C172S3     C172S3 172 South
## C178S2     C178S2 178 South
## C178P3     C178P3 178 Point
## C178S3     C178S3 178 South
## C172N1     C172N1 172 North
## C172S1     C172S1 172 South
## C178N3     C178N3 178 North
## C185N2     C185N2 185 North
## C185N3     C185N3 185 North
## C185S3     C185S3 185 South
## C214N2     C214N2 214 North
## C172P2     C172P2 172 Point
## C185S2     C185S2 185 South
## C172P3     C172P3 172 Point
## C185N1     C185N1 185 North
## C172N2     C172N2 172 North
## C178S1     C178S1 178 South
## C185S1     C185S1 185 South
## C172S2     C172S2 172 South
## C178N2     C178N2 178 North
# Normalize by relative abundance. 
biom.relabund <- transform_sample_counts(biom, function(x) x / sum(x))
# Ordination plot, k = 3. 
ordNMDS.k3 <- ordinate(biom.relabund, method="NMDS", distance="bray", k=3)
## Run 0 stress 0.07869986 
## Run 1 stress 0.07869949 
## ... New best solution
## ... Procrustes: rmse 0.0002305481  max resid 0.001105219 
## ... Similar to previous best
## Run 2 stress 0.07869962 
## ... Procrustes: rmse 0.0002017883  max resid 0.001019135 
## ... Similar to previous best
## Run 3 stress 0.07869844 
## ... New best solution
## ... Procrustes: rmse 0.0004553477  max resid 0.001521332 
## ... Similar to previous best
## Run 4 stress 0.08334584 
## Run 5 stress 0.07874264 
## ... Procrustes: rmse 0.002511844  max resid 0.01380174 
## Run 6 stress 0.07869964 
## ... Procrustes: rmse 0.0005900723  max resid 0.002571066 
## ... Similar to previous best
## Run 7 stress 0.08334478 
## Run 8 stress 0.07872072 
## ... Procrustes: rmse 0.001339293  max resid 0.005271881 
## ... Similar to previous best
## Run 9 stress 0.07870018 
## ... Procrustes: rmse 0.0006557135  max resid 0.002997354 
## ... Similar to previous best
## Run 10 stress 0.07869808 
## ... New best solution
## ... Procrustes: rmse 0.0001766944  max resid 0.0005861921 
## ... Similar to previous best
## Run 11 stress 0.07869856 
## ... Procrustes: rmse 0.0002775179  max resid 0.001302053 
## ... Similar to previous best
## Run 12 stress 0.08337886 
## Run 13 stress 0.07869809 
## ... Procrustes: rmse 0.0001472413  max resid 0.0003891141 
## ... Similar to previous best
## Run 14 stress 0.07869902 
## ... Procrustes: rmse 0.0003348988  max resid 0.001535 
## ... Similar to previous best
## Run 15 stress 0.07869916 
## ... Procrustes: rmse 0.0003981774  max resid 0.001889146 
## ... Similar to previous best
## Run 16 stress 0.08334287 
## Run 17 stress 0.08804081 
## Run 18 stress 0.07869856 
## ... Procrustes: rmse 0.0002547032  max resid 0.001142475 
## ... Similar to previous best
## Run 19 stress 0.08695913 
## Run 20 stress 0.07906707 
## ... Procrustes: rmse 0.00900781  max resid 0.03814892 
## *** Solution reached
ord.k3 <- plot_ordination(biom.relabund, ordNMDS.k3, shape="Site", color = "Date") + geom_point(size=2)
ord.k3 + labs(title = "Cladophora, 2014") + theme_bw() + scale_colour_hue(h=c(300, 500))

df = as(sample_data(biom), "data.frame")
d = phyloseq::distance(biom, "bray")
clado.adonis = adonis(d ~ Date*Site, df)
clado.adonis
## 
## Call:
## adonis(formula = d ~ Date * Site, data = df) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## Date       5    3.1945 0.63890 17.4143 0.41338  0.001 ***
## Site       2    1.5561 0.77804 21.2068 0.20136  0.001 ***
## Date:Site 10    1.7298 0.17298  4.7148 0.22384  0.001 ***
## Residuals 34    1.2474 0.03669         0.16142           
## Total     51    7.7278                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
biom.rich.est <- estimate_richness(biom, measures = NULL)
biom.rich.est$SampleID.1 <- row.names(biom.rich.est)
biom.rich.est <- merge(biom.rich.est, sam.data, by = "SampleID.1")
biom.rich.est$Date <- as.character(biom.rich.est$Date)
biom.rich.est$Date <- as.numeric(biom.rich.est$Date)
head(biom.rich.est)
##   SampleID.1 Observed    Chao1  se.chao1      ACE   se.ACE  Shannon
## 1     C172N1     5139 6770.782  99.67941 7316.921 48.50633 5.444561
## 2     C172N2     5236 7017.650 107.98761 7420.840 47.98329 5.649933
## 3     C172N3     6890 8499.605  90.45120 9053.801 49.75824 5.980043
## 4     C172P1     2936 4855.835 138.81016 5427.422 47.24285 4.851552
## 5     C172P2     4833 7043.680 130.90171 7619.611 51.59008 5.306940
## 6     C172P3     3750 5099.247  96.43281 5368.421 41.19882 4.824680
##     Simpson InvSimpson    Fisher TreatmentGroup  Site Date
## 1 0.9874631   79.76426  896.0810          Early North  172
## 2 0.9892749   93.23962 1001.0057          Early North  172
## 3 0.9899773   99.77399 1361.3171          Early North  172
## 4 0.9767986   43.10086  520.1978          Early Point  172
## 5 0.9803746   50.95444  904.5831          Early Point  172
## 6 0.9400545   16.68183  655.5148          Early Point  172
##                         Description  DateSite
## 1 Sample of day 172 at site North 1 172 North
## 2 Sample of day 172 at site North 2 172 North
## 3 Sample of day 172 at site North 3 172 North
## 4 Sample of day 172 at site Point 1 172 Point
## 5 Sample of day 172 at site Point 2 172 Point
## 6 Sample of day 172 at site Point 3 172 Point
# Plot observed richness. 
biom.rich.est.obs <- summarySE(biom.rich.est, measurevar="Observed", groupvars=c("Date","Site"))
p.obs <- ggplot(biom.rich.est.obs, aes(x=Date, y=Observed, color = Site)) + geom_point() +  geom_errorbar(aes(ymin=Observed-se, ymax=Observed+se)) + geom_line() + scale_colour_hue(h=c(300, 850))
p.obs + theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))

# Plot Shanon index richness. 
biom.rich.est.sha <- summarySE(biom.rich.est, measurevar="Shannon", groupvars=c("Date","Site"))
p.sha <- ggplot(biom.rich.est.sha, aes(x=Date, y=Shannon, color = Site)) + geom_point() +  geom_errorbar(aes(ymin=Shannon-se, ymax=Shannon+se)) + geom_line() + scale_colour_hue(h=c(300, 850))
p.sha + theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))

# Find top 30 genera and subset biom.relabund. 
sort.genera <- sort(tapply(taxa_sums(biom.relabund), tax_table(biom.relabund)[, "Genus"], sum), TRUE)
top.genera <- sort.genera[1:30]
top.genera.list <- names(top.genera)
biom.relabund.subset = subset_taxa(biom.relabund, Genus %in% top.genera.list)
biom.relabund.subset.taxa <- subset_taxa(biom.relabund.subset, Genus %in% as.factor(top.genera.list))
biom.relabund.subset.taxa
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 9093 taxa and 52 samples ]
## sample_data() Sample Data:       [ 52 samples by 6 sample variables ]
## tax_table()   Taxonomy Table:    [ 9093 taxa by 8 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 9093 tips and 9089 internal nodes ]
relabund.top.genera <- psmelt(biom.relabund.subset.taxa)
relabund.top.genera.genus <- relabund.top.genera%>%
  group_by(Sample, Genus)%>%
  mutate(GenusAbundance = sum(Abundance))%>%
  distinct(Sample, GenusAbundance, TreatmentGroup, Site, Date, Family, Genus)
head(relabund.top.genera.genus)
## Source: local data frame [6 x 9]
## Groups: Sample, Genus [6]
## 
##   Sample GenusAbundance TreatmentGroup   Site   Date          Family
##    <chr>          <dbl>         <fctr> <fctr> <fctr>          <fctr>
## 1 C178S2     0.14728018          Early  South    178 Crenotrichaceae
## 2 C206N2     0.07541493           Late  North    206      Thermaceae
## 3 C199S1     0.12408425           Late  South    199 Crenotrichaceae
## 4 C185S1     0.12728636          Early  South    185 Crenotrichaceae
## 5 C199S3     0.10486769           Late  South    199 Crenotrichaceae
## 6 C172S1     0.10894857          Early  South    172   Cytophagaceae
## # ... with 3 more variables: Genus <fctr>, Sample <chr>, Genus <fctr>
# Summary of genus abundance of top 30 genera. 
relabund.top.genera.genus.est <- summarySE(relabund.top.genera.genus, measurevar="GenusAbundance", groupvars=c("Site","Date", "Genus"))
head(relabund.top.genera.genus.est)
##    Site Date         Genus N GenusAbundance           sd           se
## 1 North  172   Armatimonas 3   0.0082179013 0.0037601213 0.0021709070
## 2 North  172  Bdellovibrio 3   0.0023540936 0.0001963925 0.0001133873
## 3 North  172    Cellvibrio 3   0.0058399987 0.0066066072 0.0038143264
## 4 North  172          CM44 3   0.0088329518 0.0005298073 0.0003058844
## 5 North  172    Crenothrix 3   0.0008092387 0.0004823472 0.0002784833
## 6 North  172 Dechloromonas 3   0.0019270556 0.0026318679 0.0015195096
##             ci
## 1 0.0093406591
## 2 0.0004878661
## 3 0.0164117220
## 4 0.0013161143
## 5 0.0011982168
## 6 0.0065379223
relabund.top.genera.genus.est$Date <- as.character(relabund.top.genera.genus.est$Date)
relabund.top.genera.genus.est$Date <- as.numeric(relabund.top.genera.genus.est$Date)
# Plot summary of genus abundance of top 30 genera. 
p <- ggplot(relabund.top.genera.genus.est, aes(x=Date, y=GenusAbundance, color = Site)) + geom_point() +  geom_errorbar(aes(ymin=GenusAbundance-se, ymax=GenusAbundance+se)) +  geom_line() + facet_wrap(~Genus, ncol = 3, scales="free_y") + scale_colour_hue(h=c(300, 850))
p + theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))

# Find methanotrophic bacteria by genus. 
methanolist <- read.table(file = "taxa-of-interest/methanos.txt")
methanolist <- as.vector(methanolist$V1)
biom.relabund.methanos <- subset_taxa(biom.relabund, Genus %in% as.factor(methanolist))
biom.relabund.methanos
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 567 taxa and 52 samples ]
## sample_data() Sample Data:       [ 52 samples by 6 sample variables ]
## tax_table()   Taxonomy Table:    [ 567 taxa by 8 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 567 tips and 566 internal nodes ]
relabund.methanos <- psmelt(biom.relabund.methanos)
relabund.methanos.genus <- relabund.methanos%>%
  group_by(Sample, Genus)%>%
  mutate(GenusAbundance = sum(Abundance))%>%
  distinct(Sample, GenusAbundance, TreatmentGroup, Site, Date, Family, Genus)
relabund.methanos.genus.est <- summarySE(relabund.methanos.genus, measurevar="GenusAbundance", groupvars=c("Site","Date", "Genus"))
head(relabund.methanos.genus.est)
##    Site Date            Genus N GenusAbundance           sd           se
## 1 North  172       Crenothrix 3   8.092387e-04 4.823472e-04 2.784833e-04
## 2 North  172      Methylibium 3   6.600147e-04 3.800437e-04 2.194183e-04
## 3 North  172    Methylocaldum 3   7.717447e-05 5.988775e-05 3.457621e-05
## 4 North  172 Methylomicrobium 3   0.000000e+00 0.000000e+00 0.000000e+00
## 5 North  172     Methylomonas 3   1.097031e-04 2.736128e-06 1.579704e-06
## 6 North  172     Methylosinus 3   4.677898e-05 2.432451e-05 1.404376e-05
##             ci
## 1 1.198217e-03
## 2 9.440809e-04
## 3 1.487694e-04
## 4 0.000000e+00
## 5 6.796919e-06
## 6 6.042543e-05
relabund.methanos.genus.est$Date <- as.character(relabund.methanos.genus.est$Date)
relabund.methanos.genus.est$Date <- as.numeric(relabund.methanos.genus.est$Date)
# Plot summary of genus abundance of methanotrophic genera. 
p <- ggplot(relabund.methanos.genus.est, aes(x=Date, y=GenusAbundance, color = Site)) + geom_point() +  geom_errorbar(aes(ymin=GenusAbundance-se, ymax=GenusAbundance+se)) +  geom_line() + facet_wrap(~Genus, ncol = 3, scales="free_y") + scale_colour_hue(h=c(300, 850))
p + theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))

# Find all genera
all.genera <- sort(get_taxa_unique(biom.relabund, "Genus"), decreasing=FALSE)
biom.relabund.all.genera <- subset_taxa(biom.relabund, Genus %in% as.factor(all.genera))
biom.relabund.all.genera <- psmelt(biom.relabund.all.genera)
biom.relabund.all.genera.genus <- biom.relabund.all.genera%>%
  group_by(Sample, Genus)%>%
  mutate(GenusAbundance = sum(Abundance))%>%
  distinct(Sample, GenusAbundance, TreatmentGroup, Site, Date, Family, Genus)
biom.relabund.all.genera.genus.est <- summarySE(biom.relabund.all.genera.genus, measurevar="GenusAbundance", groupvars=c("Site","Date", "Genus"))
head(biom.relabund.all.genera.genus.est)
##    Site Date           Genus N GenusAbundance           sd           se
## 1 North  172             A17 3   1.044934e-05 7.240333e-06 4.180208e-06
## 2 North  172   Achromobacter 3   1.205785e-06 2.088482e-06 1.205785e-06
## 3 North  172 Acidaminobacter 3   2.030206e-05 3.516421e-05 2.030206e-05
## 4 North  172      Acidocella 3   0.000000e+00 0.000000e+00 0.000000e+00
## 5 North  172      Acidovorax 3   4.023211e-05 3.508560e-05 2.025668e-05
## 6 North  172   Acinetobacter 3   1.395043e-04 1.803168e-04 1.041059e-04
##             ci
## 1 1.798598e-05
## 2 5.188076e-06
## 3 8.735273e-05
## 4 0.000000e+00
## 5 8.715747e-05
## 6 4.479317e-04
biom.relabund.all.genera.genus.est$Date <- as.character(biom.relabund.all.genera.genus.est$Date)
biom.relabund.all.genera.genus.est$Date <- as.numeric(biom.relabund.all.genera.genus.est$Date)
p <- ggplot(biom.relabund.all.genera.genus.est, aes(x=Date, y=GenusAbundance, color = Site)) + geom_point() +  geom_errorbar(aes(ymin=GenusAbundance-se, ymax=GenusAbundance+se)) +  geom_line() + facet_wrap(~Genus, ncol = 3, scales="free_y") + scale_colour_hue(h=c(300, 850))
p + theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))

# We want to calculate the total relative abundance of each phylum. 
# "melt" the phyloseq data into a dataframe and then take the top X most abundant phyla. 
biom.melt <- psmelt(biom)
biom.melt.sorted <- biom.melt %>%
  group_by(Sample,Phylum) %>%
  summarize(PhyAbund = sum(Abundance))%>%
  group_by(Phylum)%>%
  summarize(MeanPhyAbund = mean(PhyAbund))%>%
  arrange(-MeanPhyAbund)
# List of nPhyla top most abundant phyla. 
nPhyla = 13
PhylumList <- biom.melt.sorted[1:nPhyla,1]
PhylumList <- PhylumList[is.na(PhylumList)==FALSE,]
PhylumList <- levels(droplevels(as.factor(PhylumList$Phylum)))
PhylumList
##  [1] "[Thermi]"         "Acidobacteria"    "Actinobacteria"  
##  [4] "Armatimonadetes"  "Bacteroidetes"    "Cyanobacteria"   
##  [7] "Gemmatimonadetes" "GN02"             "Planctomycetes"  
## [10] "Proteobacteria"   "SR1"              "Verrucomicrobia"
# Subset biom.melt for phyla. 
biom.subset <- subset_taxa(biom, Phylum %in% PhylumList)
biom.subset.melt <- psmelt(biom.subset)
biom.subset.melt.sorted = biom.subset.melt %>%
  group_by(Sample,Site,Date,Phylum) %>%
  summarize(PhyAbund = sum(Abundance))
# Summarize phylum abundances. 
biom.subset.melt.sorted.est <- summarySE(biom.subset.melt.sorted, measurevar="PhyAbund", groupvars=c("Site","Date", "Phylum"))
head(biom.subset.melt.sorted.est)
##    Site Date          Phylum N  PhyAbund        sd       se        ci
## 1 North  172        [Thermi] 3  8295.333  4982.265 2876.512 12376.632
## 2 North  172   Acidobacteria 3 12588.333  7980.641 4607.625 19825.011
## 3 North  172  Actinobacteria 3  9579.667  3509.989 2026.493  8719.297
## 4 North  172 Armatimonadetes 3  1996.667  1313.123  758.132  3261.979
## 5 North  172   Bacteroidetes 3 89137.667 16607.997 9588.632 41256.552
## 6 North  172   Cyanobacteria 3  8193.000  6304.118 3639.684 15660.297
biom.subset.melt.sorted.est$Date <- as.character(biom.subset.melt.sorted.est$Date)
biom.subset.melt.sorted.est$Date <- as.numeric(biom.subset.melt.sorted.est$Date)
# Plot top phyla abundances. 
p <- ggplot(biom.subset.melt.sorted.est, aes(x=Date, y=PhyAbund, color = Site)) + geom_point() +  geom_errorbar(aes(ymin=PhyAbund-se, ymax=PhyAbund+se)) +  geom_line() + facet_wrap(~Phylum, ncol = 3, scales="free_y") + scale_colour_hue(h=c(300, 850))
p + theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10)) + labs(x="Amendment",y="Relative abundance")

# Find top 30 classes and subset biom.relabund. 
sort.classes <- sort(tapply(taxa_sums(biom.relabund), tax_table(biom.relabund)[, "Class"], sum), TRUE)
top.classes <- sort.classes[1:30]
top.classes.list <- names(top.classes)
biom.relabund.subset = subset_taxa(biom.relabund, Class %in% top.classes.list)
biom.relabund.subset.taxa <- subset_taxa(biom.relabund.subset, Class %in% as.factor(top.classes.list))
biom.relabund.subset.taxa
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 38156 taxa and 52 samples ]
## sample_data() Sample Data:       [ 52 samples by 6 sample variables ]
## tax_table()   Taxonomy Table:    [ 38156 taxa by 8 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 38156 tips and 38142 internal nodes ]
relabund.top.classes <- psmelt(biom.relabund.subset.taxa)
relabund.top.classes.class <- relabund.top.classes%>%
  group_by(Sample, Class)%>%
  mutate(ClassAbundance = sum(Abundance))%>%
  distinct(Sample, ClassAbundance, TreatmentGroup, Site, Date, Family, Class)
relabund.top.classes.class.print <- summarySE(relabund.top.classes.class, measurevar="ClassAbundance", groupvars=c("Class"))
relabund.top.classes.class.print[,c("Class","N","ClassAbundance")]
##                    Class    N ClassAbundance
## 1   [Chloracidobacteria]  156   0.0242780076
## 2         [Pedosphaerae]  312   0.0008576936
## 3          [Saprospirae]  156   0.2600740284
## 4         Acidimicrobiia  572   0.0142314956
## 5        Acidobacteria-6  156   0.0087173757
## 6         Actinobacteria 1092   0.0008919558
## 7    Alphaproteobacteria 1196   0.1189849423
## 8          Armatimonadia   52   0.0043355902
## 9            Bacteroidia  468   0.0016047794
## 10                 BD1-5   52   0.0036947445
## 11    Betaproteobacteria  728   0.1394166181
## 12           Chloroplast  156   0.0387461867
## 13            Clostridia  624   0.0008200079
## 14            Cytophagia  260   0.0479689304
## 15            Deinococci  156   0.0590344529
## 16   Deltaproteobacteria 1352   0.0199104462
## 17        Flavobacteriia  208   0.0494758417
## 18   Gammaproteobacteria 1664   0.0668946728
## 19      Gemmatimonadetes  156   0.0019508196
## 20                 OM190   52   0.0040714493
## 21                 OPB56   52   0.0007609416
## 22         Phycisphaerae  104   0.0016755586
## 23        Planctomycetia  260   0.0069005428
## 24          Solibacteres  208   0.0118832190
## 25      Sphingobacteriia  156   0.0073552263
## 26 Synechococcophycideae  260   0.0354155284
## 27                  TA18   52   0.0015786666
## 28                 TM7-1   52   0.0018488588
## 29      Verrucomicrobiae   52   0.0026770285
## 30                   ZB2   52   0.0007206090
# Summary of class abundance of top 30 classes. 
relabund.top.classes.class.est <- summarySE(relabund.top.classes.class, measurevar="ClassAbundance", groupvars=c("Site","Date", "Class"))
head(relabund.top.classes.class.est)
##    Site Date                Class  N ClassAbundance           sd
## 1 North  172 [Chloracidobacteria]  9    0.039306658 0.0176302507
## 2 North  172       [Pedosphaerae] 18    0.001325860 0.0001686127
## 3 North  172        [Saprospirae]  9    0.191751831 0.0190289005
## 4 North  172       Acidimicrobiia 33    0.038384578 0.0085857752
## 5 North  172      Acidobacteria-6  9    0.003818652 0.0006679931
## 6 North  172       Actinobacteria 63    0.003232369 0.0001236175
##             se           ci
## 1 5.876750e-03 1.355181e-02
## 2 3.974240e-05 8.384913e-05
## 3 6.342967e-03 1.462691e-02
## 4 1.494592e-03 3.044383e-03
## 5 2.226644e-04 5.134650e-04
## 6 1.557434e-05 3.113266e-05
relabund.top.classes.class.est$Date <- as.character(relabund.top.classes.class.est$Date)
relabund.top.classes.class.est$Date <- as.numeric(relabund.top.classes.class.est$Date)
# Plot summary of class abundance of top 30 classes. 
p <- ggplot(relabund.top.classes.class.est, aes(x=Date, y=ClassAbundance, color = Site)) + geom_point() +  geom_errorbar(aes(ymin=ClassAbundance-se, ymax=ClassAbundance+se)) +  geom_line() + facet_wrap(~Class, ncol = 3, scales="free_y") + scale_colour_hue(h=c(300, 850))
p + theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))

# Find all classes
all.classes <- sort(get_taxa_unique(biom.relabund, "Class"), decreasing=FALSE)
biom.relabund.all.classes <- subset_taxa(biom.relabund, Class %in% as.factor(all.classes))
biom.relabund.all.classes <- psmelt(biom.relabund.all.classes)
biom.relabund.all.classes.class <- biom.relabund.all.classes%>%
  group_by(Sample, Class)%>%
  mutate(ClassAbundance = sum(Abundance))%>%
  distinct(Sample, ClassAbundance, TreatmentGroup, Site, Date, Family, Class)
biom.relabund.all.classes.class.est <- summarySE(biom.relabund.all.classes.class, measurevar="ClassAbundance", groupvars=c("Site","Date", "Class"))
head(biom.relabund.all.classes.class.est)
##    Site Date                Class N ClassAbundance           sd
## 1 North  172       [Brachyspirae] 6   0.000000e+00 0.000000e+00
## 2 North  172       [Brevinematae] 3   0.000000e+00 0.000000e+00
## 3 North  172 [Chloracidobacteria] 9   3.930666e-02 1.763025e-02
## 4 North  172        [Cloacamonae] 6   0.000000e+00 0.000000e+00
## 5 North  172     [Fimbriimonadia] 3   2.686822e-05 1.191337e-05
## 6 North  172      [Lentisphaeria] 6   1.561697e-06 2.419371e-06
##             se           ci
## 1 0.000000e+00 0.000000e+00
## 2 0.000000e+00 0.000000e+00
## 3 5.876750e-03 1.355181e-02
## 4 0.000000e+00 0.000000e+00
## 5 6.878186e-06 2.959445e-05
## 6 9.877040e-07 2.538974e-06
biom.relabund.all.classes.class.est$Date <- as.character(biom.relabund.all.classes.class.est$Date)
biom.relabund.all.classes.class.est$Date <- as.numeric(biom.relabund.all.classes.class.est$Date)
p <- ggplot(biom.relabund.all.classes.class.est, aes(x=Date, y=ClassAbundance, color = Site)) + geom_point() +  geom_errorbar(aes(ymin=ClassAbundance-se, ymax=ClassAbundance+se)) +  geom_line() + facet_wrap(~Class, ncol = 3, scales="free_y") + scale_colour_hue(h=c(300, 850))
p + theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))

# Data downloaded from 
data <- read.csv("north_temperate_lakes_lter__daily_water_temperature_-_lake_mendota.csv", header=T)
head(data)
##   sampledate year4 month daynum depth  wtemp flag_wtemp
## 1 2006-06-27  2006     6    178   0.0 22.205          N
## 2 2006-06-27  2006     6    178   0.5 22.202          N
## 3 2006-06-27  2006     6    178   1.0 22.244          N
## 4 2006-06-27  2006     6    178   1.5 22.219          N
## 5 2006-06-27  2006     6    178   2.0 22.259          N
## 6 2006-06-27  2006     6    178   2.5 22.214          N
nolegend <- theme(legend.position="none")
Jdays <- c(172,178,185,199,206,214)
# Subset all wtemp at or below 1.5 meters (sample depth up to shore). 
clado.depth <- subset(data, depth<=1.5, select = year4:wtemp)
# All years (2006-2016), full year. 
p <- qplot(clado.depth$daynum, y = clado.depth$wtemp)
p <- p + geom_point(aes(colour = clado.depth$year4), size=1) + 
  scale_y_continuous(limits = c(4,30))+
  labs(title = "Water Temperature <1.5m Depth, Lake Mendota") + 
  xlab("Julian Day") + ylab("Temperature °C")+ 
  geom_vline(xintercept=Jdays, colour="darkgreen", linetype = "longdash") +
  guides(color=guide_legend(title="Year")) + 
  scale_colour_gradient(low = "darkred")
p

# All years (2006-2016), sample dates ± 25 days
clado.depth.zoom <- subset(clado.depth, daynum>=150 & daynum<=250, select = year4:wtemp)
p <- qplot(clado.depth.zoom$daynum, y = clado.depth.zoom$wtemp) + 
  scale_y_continuous(limits = c(10,30))+
  geom_point(aes(colour = clado.depth.zoom$year4), size=1) + 
  labs(title = "Water Temperature <1.5m Depth, Lake Mendota", x="julian day", y="temperature °C") + 
  geom_vline(xintercept=Jdays, colour="darkgreen", linetype = "longdash") + 
  guides(color=guide_legend(title="year")) + scale_colour_gradient(low = "darkred")
p

# Smarter way to make grid of plots. 
p <- ggplot(clado.depth.zoom, aes(daynum, wtemp))
p <- p + geom_point(size = 1) + ylim(15,30) + facet_wrap(~year4) + geom_vline(xintercept=Jdays, colour="grey30", linetype = "longdash") + guides(color=guide_legend(title="Year"))
p